;******************************************
; plot time serie of temperature anoamaly and HW frequency 
;********************************************
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_code.ncl"
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_csm.ncl"
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl"
;********************************************
begin

;set input

 dirPath0 = ".../NFood_replication/data/" ; change to local path of your directory

 ;target variables for analysis
 ;CROPPROD1C: "1-yr grain product C (gridcell)"
 ;CROPPROD1C_HARVEST:long_name = "crop harvest flux to 1-yr grain product pool (gridcell)"
 ;CROPPROD1C_HARV_CFT:long_name = "crop harvest flux to 1-yr grain product pool for each crop (pft)"
 var = "CROPPROD1C_HARV_CFT" ; harvest flux per crop type [time, cft, lat, lon]
 nvar = dimsizes(var);

 ;cases and years
 casename =(/"IRCP45Clm50BgcCropGs_cplhist_1deg_CNTL","IRCP45Clm50BgcCropGs_cplhist_1deg_RCP85LU","IRCP85Clm50BgcCropGs_cplhist_1deg_CNTL","IRCP85Clm50BgcCropGs_cplhist_1deg_RCP45CO2", "IRCP85Clm50BgcCropGs_cplhist_1deg_SAI01","IRCP85Clm50BgcCropGs_cplhist_1deg_MSB01","IRCP85Clm50BgcCropGs_cplhist_1deg_CCT01"/)
 ;case = (/"RCP45", "RCP85", "RCP85_SAI", "RCP85_MSB", "RCP85_CCT"/);
 case = (/"RCP45","RCP45_85LU","RCP85","RCP85_45CO2","RCP85_SAI","RCP85_MSB","RCP85_CCT"/)
 ncase = dimsizes(case);
 syr = 2006;
 fyr = 2100;
 csyr = sprinti("%0.4i",syr);
 cfyr = sprinti("%0.4i",fyr);
 nyr = fyr - syr + 1;

;read dimension
 fdim = addfile(dirPath0+"/land_cover/RCP85.1deg.landarea.nc", "r");
 lat = fdim->lat
 lon = fdim->lon
 nlat = dimsizes(lat);
 nlon = dimsizes(lon);
 dims = (/nlat,nlon/);
 rad    = 4.0*atan(1.0)/180.0
 clat   = cos(lat*rad); used clat to estimate global average flux when the area (dy * dx) represented by each grid point is a function of latitude
 clat!0 = "lat"
 clat&lat = lat

 ;Or directly use the explicit area and land fraction variables for regional sum and average
 garea = fdim->area  ;sqrkm
 lfrac = fdim->landfrac
 dydx = garea*lfrac*1e6 ;; sqrkm to sqrm
 wgt  = dydx  ;wgt[size (nlat, nlon)]

 ;name coordinates for wgt, for limited area sums
 wgt!0   = "lat"
 wgt!1   = "lon"
 wgt&lat =  lat
 wgt&lon =  lon
 ;usage: input q(time, lat, lon) [size: (ntim, nlat, nlon)]
 ;wgt[lat, lon]: two-dimensional array corresponding to the rightmost dimensions of q
 ;output qSum = wgt_areasum2(q, wgt, opt)  ; => qSum(ntim)



 ;Eight active crops in CLM5:
 ; 17/18=Temperate Corn/irrigated, 19/20=Spring Wheat/irrigated, 23/24=Temperate Soybean/irrigated, 41/42=Cotton/Cotton_irrigated,
 ; 61/62=Rice/Rice_irrigated, 67/68=Sugarcane/Sugarcane_irrigated, 75/76=Tropical Corn/irrigated, 77/78=Tropical Soybean/irrigated
 ;Winter wheat (21/22) has not been implemented yet. Only Spring/summer wheat
 ;cft_c3_crop = 1; cft_temperate_corn = 3; cft_spring_wheat = 5 ... cft sequence in crop landunit
 pfts = (/17,18, 19,20, 23,24, 41,42, 61,62, 67,68, 75,76, 77,78 /)      ; crop index among all pfts
 cfts = (/17,18, 19,20, 23,24, 41,42, 61,62, 67,68, 75,76, 77,78 /)-15   ; cft data index
 ncft = dimsizes(cfts);


;data frame for barplot
 totyd_sum = new((/ncft,ncase,nyr/),"double"); 3d array 
 totyd_avg = new((/ncft,ncase,nyr/),"double"); 3d array
 totyd_avg_60lat = new((/ncft,ncase,nyr/),"double");
 tot_area_cft = new((/ncft,ncase,nyr/),"double"); ha/year



;read data for all cases
do kk=0,ncase-1

   ;these variables should be declared and deleted for each case to avoid using old memory
   cft_area  = new((/nyr,ncft,nlat,nlon/),"double"); 4d array
   yield_avg = new((/nyr,ncft,nlat,nlon/),"double"); 4d array
   yield_sum = new((/nyr,ncft,nlat,nlon/),"double"); 4d array


   ;read crop land cover info from fsurfdata
   ;NOTE: these landuse.timeseries files are very large (24 GB each), and can be downloaded from the NCAR svn site: https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/inputdata/lnd/clm2/surfdata_map/
   if (kk .lt. 1) then; ;only for RCP45
     fsurf := addfile("/cluster/shared/noresm/inputdata/lnd/clm2/surfdata_map/landuse.timeseries_0.9x1.25_SSP2-4.5_78pfts_CMIP6_simyr1850-2100_c190102.nc", "r")
   else ; for RCP85/SAI/MSB/CCT, and RCP45_85LU, RCP85_45CO2
     fsurf := addfile("/cluster/shared/noresm/inputdata/lnd/clm2/surfdata_map/landuse.timeseries_0.9x1.25_SSP5-8.5_78pfts_CMIP6_simyr1850-2100_c181209.nc", "r")
   end if
   fcrop:= fsurf->PCT_CROP ; total percent crop landunit of a gridcell; years 1850-2100; [time | 251] x [lat | 192] x [lon | 288]
   fcft := fsurf->PCT_CFT  ; percent crop functional type on the crop landunit (% of landunit); [time | 251] x [cft | 64] x [lat | 192] x [lon | 288]
   ;use crop specific harvest flux and the crop area to estimate crop yield for each active CFT
   ;most crops are within lat[-60:60]

   ;read crop yield data
   if (kk .lt. 4) then; ;RCP45 and RCP85
     ;unzip the files under .../NFood_replication/data/yield_data/rawdata/CROPPROD1C_HARV_CFT.zip
     fi := addfile(dirPath0+"/yield_data/rawdata/CROPPROD1C_HARV_CFT."+case(kk)+".1deg.clm5.h0.2006-2100.nc", "r");
     dat0 := fi->$var$ ;monthly data
     ;print(dat0) ; [time | 1140] x [cft | 64] x [lat | 192] x [lon | 288]
     datyr := month_to_annual(dat0, 1); annual mean [year | 95] x [cft | 64] x [lat | 192] x [lon | 288]
     ;datyr := dat0yr(0:94,:,:) ;make sure datyr and fcrop0 has the same dimension
     fcrop0 := fcrop(156:250,:,:)/100 ;fraction of all crop area 2006-2100
     fcft0  := fcft(156:250,:,:,:)/100 ;fraction of each cft
     copy_VarCoords(datyr,fcft0)
   else
     fi := addfile(dirPath0+"/yield_data/rawdata/CROPPROD1C_HARV_CFT."+case(kk)+".1deg.clm5.h0.2020-2100.nc", "r");
     dat0 := fi->$var$ ; [time | 972] x [cft | 64] x [lat | 192] x [lon | 288]
     datyr := month_to_annual(dat0, 1); annual mean flux [year | 81] x [cft | 64] x[lat | 192] x [lon | 288]
     fcrop0 := fcrop(170:250,:,:)/100 ;fraction for all crop 2020-2100 [time | 81] x [lat | 192] x [lon | 288]
     fcft0  := fcft(170:250,:,:,:)/100 ;fraction of each cft [time | 81] x [cft | 64] x [lat | 192] x [lon | 288]
     copy_VarCoords(datyr,fcft0)
   end if
   delete(dat0) ; needs to remove the memory to reuse a high dimensional variable
   printVarSummary(datyr)

   ;read data for each cft
   do jj=0,ncft-1  
     ;area weighted for each crop

    ;first calculate actual crop area in each grid cell per crop [nyr,ncft,nlat,nlon]
     wgt3d := conform_dims(dimsizes(fcrop0),wgt,(/1,2/)) ; conform wgt to 3d: [year | 95 or 81] x[lat | 192] x [lon | 288]
     copy_VarCoords(datyr(:,cfts(jj),:,:), wgt3d)

     ;save annual crop planted area per cft per grid; wgt is the land area per grid (static), but fcrop0 and fcft0 are dynamic
     crop_area := wgt3d*fcrop0*fcft0(:,cfts(jj),:,:) ; area per crop per year for each grid cell
     copy_VarCoords(datyr(:,cfts(jj),:,:),crop_area)
     ;crop_area@_FillValue = 0 ;to avoid division by 0, set 0 values to nondata
     tot_cft_area := dim_sum_n(crop_area, (/1,2/)) ;sqrm; total area per crop per year over all lat/lon
     
     ;================
     ;NOTE: CROPPROD1C_HARV_CFT [time, cft, lat, lon] is averaged flux computed by dividing per crop harvest amount (gC/s) with crop area (m2) in each grid
     ;thus it already considers pft and cft fractions
     ;that is why some crops have much higher flux than others, and some grid cells have higher flux than other cells
     ;To get global total area weighted sum with wgt_areasum2, the specific crop area in each grid cell
     ;must be used as the weight so that flux gC/m2/s can be converted back to harvest amount gC/s in calculating global sum
     ;To get global average flux, the total harvest amount per crop should be divided by total crop area of each cft
     ;for reference, in reality corn yield is about 9 ton/ha and cotton 2 ton/ha; per ha yield should not vary significantly across grid cells

     ;==================
     ; weighted area sum/avg to get global harvest per crop
     cftY   := datyr(:,cfts(jj),:,:)*fcrop0*fcft0(:,cfts(jj),:,:) ;gC/m2/s; intermediate variable (do not use it as crop flux)
     copy_VarCoords(datyr(:,cfts(jj),:,:),cftY) ;add coordinates after calculation
     ;the fraction weight by fcrop0*fcft0 will pass to the grid land area 'wgt' to get crop area specific weight for each cft in each year
     ;wgt is sqrm land area of each gridcell (static), fcrop0*fcft0 is the dynamic crop fraction weight
     ;cftT   := wgt_areasum2(cftY(:,{-60:60},:), wgt({-60:60}, :), 0); gC/s per cft, sum over lat -60:60
     cftT0   := wgt_areasum2(cftY(:,:,:), wgt(:,:), 0); gC/s per cft, sum over globe

     ;alternative method, calculate with crop specific area weight crop_area and dim_sum_n over the [lat,lon] dims
     cftT  := dim_sum_n(datyr(:,cfts(jj),:,:)*crop_area, (/1,2/)) ;global total yield per crop
     ;cftT should be equal to cftT0
     ;print(cftT)
     ;print(cftT0)
     ;change unit to annual total yield (change unit later)
     cftT := cftT*86400*365*1.0e-12 ;global annual area sum flux gC/s to MtC/year

     ;=================
     ;save global averaged annual yield per crop (tonnes ha-1 year-1); 
     ;there are 3 methods to compute area average flux: (a) explicitly use the area of each grid cell; (b) uses only gaussian weights; (c) use the cosine of the latitudes

     ;Correct method: use explicit area of each crop in each grid cell (most crops within lat -60:60)
     ;calculate by cftA=cftT/tot_cft_area, (wgt_areaave2 only take 2d weight, and dim_avg_n only do arithmetic mean, avoid them)   
     cftA       := dim_sum_n(datyr(:,cfts(jj),:,:)*crop_area, (/1,2/))/tot_cft_area ;same as cftT/tot_cft_area
     cftA_60lat := dim_sum_n(datyr(:,cfts(jj),{-60:60},:)*crop_area(:,{-60:60},:), (/1,2/))/tot_cft_area ; gC/m2/s, average within lat -60:60 
     ;the original grid average flux datyr is computed by dividing crop specific harvest C amount with crop specific area in each grid 
     cftA := cftA*86400*365*10000*1.0e-6; global average flux gC/m2/s to tonnes/ha/year
     cftA_60lat := cftA_60lat*86400*365*10000*1.0e-6
     ;save cftA to totyd_avg [ncft,ncase,nyr]

     ;method 2(wrong): use grid cell area weight
     ;cftA       := wgt_areaave2(cftY(:,:,:), wgt(:,:), 0); gC/m2/s per cft, (wrong method!)
     ;note above problem is that wgt is total land area of each grid, although wgt_areaave2 will get the correct sum harvest amount for the dividend,
     ;the divisor for the areaave2 will be global total land area which is wrong! The divisor should be grid specific crop area for each cft.

     ;method 3: use cosine weigth "clat"; (wrong, same problem as using wgt)
     ;cftA := wgt_areaave(datyr(:,cfts(jj),{-60:60},:), clat({-60:60}), 1.0, 0); use grid average flux
     ;;cftA := wgt_areaave(cftY(:,{-60:60},:), clat({-60:60}), 1.0, 0);  cftY will pass the crop weight fcrop0*fcft0 to clat    
     ;use 1d clat only when the area of each grid point(dy * dx) is a function of latitude; but here we have explicit grid land area wgt

     ;=====================
     ;save data for ploting
     if (kk .lt. 4) then; ;RCP45 and RCP85 2006-2100
       tot_area_cft(jj,kk,:) = (tot_cft_area/10000*1.0e-6); sqrm to Mha
       totyd_sum(jj,kk,:) = (cftT)   ; MtC/year 
       totyd_avg(jj,kk,:) = (cftA)   ; tonnes/ha/year
       totyd_avg_60lat(jj,kk,:) = (cftA_60lat)  ; tonnes/ha/year within -60 ~ 60 latitude
       ;per grid data
       yield_sum(:,jj,:,:) = (datyr(:,cfts(jj),:,:)*crop_area*86400*365*1.0e-12) ; MtC/year in each grid
       yield_avg(:,jj,:,:) = (datyr(:,cfts(jj),:,:)*86400*365*10000*1.0e-6) ; gC/m2/s to tonnes/ha/year
       cft_area(:,jj,:,:)  = (crop_area/10000) ;sqrm to ha 

     else ;SAI/MSB/CCT 2020-2100
       tot_area_cft(jj,kk,14:94) = (tot_cft_area/10000*1.0e-6) ; Mha/year global
       totyd_sum(jj,kk,14:94) = cftT ; MtC/year global total per crop
       totyd_avg(jj,kk,14:94) = (cftA) ; global yield flux per crop: tonnes/ha/year
       totyd_avg_60lat(jj,kk,14:94) = (cftA_60lat)  ; tonnes/ha/year within -60 ~ 60 latitude

       yield_sum(14:94,jj,:,:) = (datyr(:,cfts(jj),:,:)*crop_area*86400*365*1.0e-12) ;MtC/year per crop for each grid
       yield_avg(14:94,jj,:,:) = (datyr(:,cfts(jj),:,:)*86400*365*10000*1.0e-6) ; gC/m2/s to tonnes/ha/year
       ;datyr = CROPPROD1C_HARV_CFT [time, cft, lat, lon] is per crop flux that already takes into account crop fraction
       ;for global map of yield flux directly show the orginal flux (convert to tonne/ha/year) for one crop at a time
       cft_area(14:94,jj,:,:)  = (crop_area/10000) ;sqrm to ha
     end if


   end do ;ncft
   delete([/datyr, fcrop0, fcft0, cftY/])

   dirPath1=dirPath0+"/yield_data"

   ;save yield data to binary for each case
   ;area sum yield for each crop: tonneC/year [nyr,ncft,nlat,nlon]
   filename1 = dirPath1+"/"+case(kk)+"_yield_sum_MtC_nyr.ncft.lat.lon"
   system("rm -rf " + filename1)
   fbindirwrite(filename1, yield_sum);

   ;area average yield for each crop: tonnes/ha/year [nyr,ncft,nlat,nlon]
   filename2 = dirPath1+"/"+case(kk)+"_yield_avg_tC-ha_nyr.ncft.lat.lon"
   system("rm -rf " + filename2)
   fbindirwrite(filename2, yield_avg);

   ;cultivation area for each crop: [nyr,ncft,nlat,nlon] 
   filename22 = dirPath1+"/"+case(kk)+"_crop_area_ha_nyr.ncft.lat.lon"
   system("rm -rf " + filename22)
   fbindirwrite(filename22, cft_area);

 end do ;ncase
 delete(fcft)
 delete(yield_sum); important to delete it and renew for each case
 delete(yield_avg)
 delete(cft_area)

 ;save data to binary for backup
 ;Global Total Annual Yield of each crop: MtC/year
 filename3 = dirPath1+"/"+"YieldTot_cft_case_MtC_year"
 system("rm -rf " + filename3)
 fbindirwrite(filename3, totyd_sum); [ncft,ncase,nyr] 
 ; fbindirwrite(dirPath0+"/AnnualCropYield.bin", datcsv); 

 ;Global average yield flux of each crop: tonne/ha/year
 filename4 = dirPath1+"/"+"YieldAvg_cft_case_tonne_ha_year"
 system("rm -rf " + filename4)
 fbindirwrite(filename4, totyd_avg); [ncft,ncase,nyr]

 filename6 = dirPath1+"/"+"YieldAvg_60lat_cft_case_tonne_ha_year"
 system("rm -rf " + filename6)
 fbindirwrite(filename6, totyd_avg_60lat); [ncft,ncase,nyr]

 ;Global crop area per cft per case for each year: Million hectare
 filename5 = dirPath1+"/"+"TotArea_cft_case_year_Mha"
 system("rm -rf " + filename5)
 fbindirwrite(filename5, tot_area_cft); [ncft,ncase,nyr]



;below save data to csv files
;*************************************

;change units get yearly crop yield for each cft
; wrk1(:,:,:) = wrk(:,:,:) * 86400*365*1.0e-12 ;gC/s to MtC/s to MtC/year 
; datcsv := datcsv(:,:,:) * 86400*365*1.0e-12 ;annual mean flux gC/s to MtC/s to MtC/year
;need to define a new variable on the left to use subscript, otherwise ERROR:"datcsv is undefined, can not subscript an undefined variable"
;Or directly change unit in the above loop.


 ;pfts = (/15, 17, 19, 21, 23, 41, 61,62, 67,68, 75, 77/)      ; crop index among all pfts
 pfts = (/17,18, 19,20, 23,24, 41,42, 61,62, 67,68, 75,76, 77,78 /) ; 8 active crops, non-irrigated and irrigated
 cftname = (/ "Temperate Corn",  "Irrigated Temperate Corn", "Spring Wheat", "Irrigated Spring Wheat",  "Temperate Soybean", "Irrigated Temperate Soybean", "Cotton", "Irrigated Cotton", \
              "Rice", "Irrigated Rice", "Sugarcane", "Irrigated Sugarcane", "Tropical Corn", "Irrigated Tropical Corn", "Tropical Soybean", "Irrigated Tropical Soybean" /)
 unit    = (/ "MtC/year" /)


;Save data to csv

  csv_file1 = dirPath1+"/cropyield_annual.csv"
  system("rm -rf " + csv_file1)


; Make the 1D dims the same length as the total length of 3D [ncft,ncase,nyr],
; so we can write them to CSV file along with data. 

; for all years 2006:2100
  dims0 = dimsizes(totyd_sum) ; [ncft,ncase,nyr]
  pft1d  = ndtooned(conform_dims(dims0,pfts,0)) ;this does not work
  cftname1d  = ndtooned(conform_dims(dims0,cftname,0))
  case1d = ndtooned(conform_dims(dims0,case, 1))
  time1d = ndtooned(conform_dims(dims0,ispan(2006,2100,1), 2))



;---Write header to CSV file.
  
 ; field_names2 = (/"PFT", "Crop name", "Case", "Year", "Yield [MtC/year]" /) ; (ncft,ncase,nyr,data)
  field_names1 = (/"PFT", "Crop name", "Case", "Year", "Yield [MtC/year]", "Area [Mha]"/) ; 
  header1 = [/str_join(field_names1,",")/]
  write_table(csv_file1, "w", header1, "%s")

;Write crop data to csv file
 ;---convert 3D array to 1D for writing to CSV
 totyd_sum_1d = ndtooned(totyd_sum)
 tot_area_cft_1d = ndtooned(tot_area_cft)

 alist1  = [/pft1d,cftname1d,case1d,time1d, totyd_sum_1d, tot_area_cft_1d/]
 ;format = "%g,%g,%g,%g,%g"
 format1 = "%2d,%s,%s,%4d,%g,%g"

 write_table(csv_file1, "a", alist1, format1)


end
